library(randomForest)
library(Metrics)
library(knitr)
library(ggplot2)
library(plyr)
library(dplyr)
library(corrplot)
library(caret)
library(gridExtra)
library(scales)
library(Rmisc)
library(ggrepel)
library(randomForest)
library(psych)
library(xgboost)
#read in all.rds(output from pre-processing.Rmd)
train=readRDS("train.rds")
test=readRDS("test.rds")
print(train)
print(test)
#train:test -> 80%:20%
train_train=train[1:(0.8*dim(train)[1]),]
test_train=train[(0.8*dim(train)[1]+1):(dim(train)[1]),]
print(train_train)
print(test_train)
set.seed(2018)
model=randomForest(x=train_train[,-230], y=train_train[,230], ntree=1000,importance=TRUE)
predictions=predict(model,test_train[,-230])
model
Call:
randomForest(x = train_train[, -230], y = train_train[, 230], ntree = 1000, importance = TRUE)
Type of random forest: regression
Number of trees: 1000
No. of variables tried at each split: 76
Mean of squared residuals: 0.01777153
% Var explained: 88.97
#evaluation of results
cor(predictions,test_train[,230])
[1] 0.9359404
rmse(test_train[,230],predictions)
[1] 0.1383465
#visualizing the results
plot(exp(predictions),exp(test_train[,230]),xlab="Predicted Label",ylab="Actual Label",main="Plot of Actual Against Predicted Labels")
lin.mod=lm(exp(test_train[,230])~exp(predictions))
pr.lm=predict(lin.mod)
lines(pr.lm~exp(predictions), col="blue", lwd=0.5)
lines(c(0,450000), c(0,450000))
legend("topleft", legend=c("fitted line", "45 degree line"),col=c("blue", "black"), lty=1, cex=0.8)

#display importance chart
set.seed(2018)
imp_RF=importance(model)
imp_DF=data.frame(Variables = row.names(imp_RF), MSE = imp_RF[,1])
imp_DF=imp_DF[order(imp_DF$MSE, decreasing = TRUE),]
ggplot(imp_DF[1:20,], aes(x=reorder(Variables, MSE), y=MSE, fill=MSE)) + geom_bar(stat = 'identity') + labs(x = 'Variables', y= '% increase MSE if variable is randomly permuted') + coord_flip() + theme(legend.position="none")

LS0tDQp0aXRsZTogInJhbmRvbSBmb3Jlc3QiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmxpYnJhcnkocmFuZG9tRm9yZXN0KQ0KbGlicmFyeShNZXRyaWNzKQ0KbGlicmFyeShrbml0cikNCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkocGx5cikNCmxpYnJhcnkoZHBseXIpDQpsaWJyYXJ5KGNvcnJwbG90KQ0KbGlicmFyeShjYXJldCkNCmxpYnJhcnkoZ3JpZEV4dHJhKQ0KbGlicmFyeShzY2FsZXMpDQpsaWJyYXJ5KFJtaXNjKQ0KbGlicmFyeShnZ3JlcGVsKQ0KbGlicmFyeShyYW5kb21Gb3Jlc3QpDQpsaWJyYXJ5KHBzeWNoKQ0KbGlicmFyeSh4Z2Jvb3N0KQ0KYGBgDQoNCmBgYHtyfQ0KI3JlYWQgaW4gYWxsLnJkcyhvdXRwdXQgZnJvbSBwcmUtcHJvY2Vzc2luZy5SbWQpDQp0cmFpbj1yZWFkUkRTKCJ0cmFpbi5yZHMiKQ0KdGVzdD1yZWFkUkRTKCJ0ZXN0LnJkcyIpDQpwcmludCh0cmFpbikNCnByaW50KHRlc3QpDQpgYGANCg0KYGBge3J9DQojdHJhaW46dGVzdCAtPiA4MCU6MjAlDQp0cmFpbl90cmFpbj10cmFpblsxOigwLjgqZGltKHRyYWluKVsxXSksXQ0KdGVzdF90cmFpbj10cmFpblsoMC44KmRpbSh0cmFpbilbMV0rMSk6KGRpbSh0cmFpbilbMV0pLF0NCnByaW50KHRyYWluX3RyYWluKQ0KcHJpbnQodGVzdF90cmFpbikNCmBgYA0KDQoNCmBgYHtyfQ0Kc2V0LnNlZWQoMjAxOCkNCm1vZGVsPXJhbmRvbUZvcmVzdCh4PXRyYWluX3RyYWluWywtMjMwXSwgeT10cmFpbl90cmFpblssMjMwXSwgbnRyZWU9MTAwMCxpbXBvcnRhbmNlPVRSVUUpDQpwcmVkaWN0aW9ucz1wcmVkaWN0KG1vZGVsLHRlc3RfdHJhaW5bLC0yMzBdKQ0KYGBgDQoNCmBgYHtyfQ0KbW9kZWwNCmBgYA0KDQpgYGB7cn0NCiNldmFsdWF0aW9uIG9mIHJlc3VsdHMNCmNvcihwcmVkaWN0aW9ucyx0ZXN0X3RyYWluWywyMzBdKQ0Kcm1zZSh0ZXN0X3RyYWluWywyMzBdLHByZWRpY3Rpb25zKQ0KYGBgDQoNCmBgYHtyfQ0KI3Zpc3VhbGl6aW5nIHRoZSByZXN1bHRzDQpwbG90KGV4cChwcmVkaWN0aW9ucyksZXhwKHRlc3RfdHJhaW5bLDIzMF0pLHhsYWI9IlByZWRpY3RlZCBMYWJlbCIseWxhYj0iQWN0dWFsIExhYmVsIixtYWluPSJQbG90IG9mIEFjdHVhbCBBZ2FpbnN0IFByZWRpY3RlZCBMYWJlbHMiKQ0KbGluLm1vZD1sbShleHAodGVzdF90cmFpblssMjMwXSl+ZXhwKHByZWRpY3Rpb25zKSkNCnByLmxtPXByZWRpY3QobGluLm1vZCkNCmxpbmVzKHByLmxtfmV4cChwcmVkaWN0aW9ucyksIGNvbD0iYmx1ZSIsIGx3ZD0wLjUpDQpsaW5lcyhjKDAsNDUwMDAwKSwgYygwLDQ1MDAwMCkpDQoNCmxlZ2VuZCgidG9wbGVmdCIsIGxlZ2VuZD1jKCJmaXR0ZWQgbGluZSIsICI0NSBkZWdyZWUgbGluZSIpLGNvbD1jKCJibHVlIiwgImJsYWNrIiksIGx0eT0xLCBjZXg9MC44KQ0KYGBgDQoNCmBgYHtyfQ0KI2Rpc3BsYXkgaW1wb3J0YW5jZSBjaGFydA0Kc2V0LnNlZWQoMjAxOCkNCmltcF9SRj1pbXBvcnRhbmNlKG1vZGVsKQ0KaW1wX0RGPWRhdGEuZnJhbWUoVmFyaWFibGVzID0gcm93Lm5hbWVzKGltcF9SRiksIE1TRSA9IGltcF9SRlssMV0pDQppbXBfREY9aW1wX0RGW29yZGVyKGltcF9ERiRNU0UsIGRlY3JlYXNpbmcgPSBUUlVFKSxdDQoNCmdncGxvdChpbXBfREZbMToyMCxdLCBhZXMoeD1yZW9yZGVyKFZhcmlhYmxlcywgTVNFKSwgeT1NU0UsIGZpbGw9TVNFKSkgKyBnZW9tX2JhcihzdGF0ID0gJ2lkZW50aXR5JykgKyBsYWJzKHggPSAnVmFyaWFibGVzJywgeT0gJyUgaW5jcmVhc2UgTVNFIGlmIHZhcmlhYmxlIGlzIHJhbmRvbWx5IHBlcm11dGVkJykgKyBjb29yZF9mbGlwKCkgKyB0aGVtZShsZWdlbmQucG9zaXRpb249Im5vbmUiKQ0KYGBgDQo=